Rarity models (with IUCN evaluated species)
Simple rarity model
if (!file.exists("./vet_rare_naturalized_10k.rds")) {
vet_rare_nat <- brm(rare | trials(vetted) ~ Naturalised_scaled + diversification_scaled + new_age_scaled + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_rare_nat, "./vet_rare_naturalized_10k.rds")
} else { vet_rare_nat <- readRDS("./vet_rare_naturalized_10k.rds")}
# vet_rare_nat
# pairs(vet_rare_nat)
outcome <- summary(vet_rare_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Diversification rate","Family age","SD brownian effects","SD family specific effects")
vet_rare_nat_plot <- stanplot(vet_rare_nat, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nat_plot

pdf("./plots_tables/vet_rare_nat_plot.pdf", width=4, height=3)
vet_rare_nat_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Number of naturalized species |
-0.70 |
0.00 |
0.19 |
-1.07 |
-0.83 |
-0.70 |
-0.57 |
-0.34 |
4362.70 |
1.00 |
| Diversification rate |
1.20 |
0.00 |
0.31 |
0.61 |
0.99 |
1.20 |
1.41 |
1.81 |
4519.84 |
1.00 |
| Family age |
1.01 |
0.00 |
0.32 |
0.38 |
0.80 |
1.01 |
1.22 |
1.63 |
4402.76 |
1.00 |
| SD brownian effects |
1.00 |
0.01 |
0.21 |
0.62 |
0.86 |
1.00 |
1.14 |
1.43 |
853.74 |
1.00 |
| SD family specific effects |
0.61 |
0.01 |
0.15 |
0.22 |
0.54 |
0.64 |
0.72 |
0.86 |
538.28 |
1.01 |
bayes_R2(vet_rare_nat)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983447 0.0003910453 0.9973925 0.998945
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat <- brms::posterior_predict(vet_rare_nat)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nat$data$rare, pp_vet_rare_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_rare_nat <- apply(pp_vet_rare_nat, 1, function(x) RMSE(x,vet_rare_nat$data$rare))
# plot(density(rmse_vet_rare_nat), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat)
## [1] 6.178086
sd(rmse_vet_rare_nat)
## [1] 0.7116806
# LOO
loo(vet_rare_nat)
## Warning: Found 145 observations with a pareto_k > 0.7 in model
## 'vet_rare_nat'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 259 log-likelihood matrix
##
## Estimate SE
## elpd_loo -671.2 18.9
## p_loo 164.0 6.0
## looic 1342.4 37.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 56 21.6% 1009
## (0.5, 0.7] (ok) 58 22.4% 200
## (0.7, 1] (bad) 116 44.8% 20
## (1, Inf) (very bad) 29 11.2% 7
## See help('pareto-k-diagnostic') for details.
IUCN vetted full rarity model
if (!file.exists("./vet_rare_10k.rds")) {
vet_rare <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(vet_rare, "./vet_rare_10k.rds")
} else { vet_rare <- readRDS("./vet_rare_10k.rds")}
# vet_rare
outcome <- summary(vet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age","Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
vet_rare_plot <- stanplot(vet_rare, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_plot

pdf("./plots_tables/vet_rare_plot.pdf", width=4, height=7)
vet_rare_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.46 |
0.01 |
0.34 |
-0.23 |
0.24 |
0.47 |
0.70 |
1.11 |
4629.61 |
1 |
| Famly age |
0.41 |
0.00 |
0.31 |
-0.21 |
0.20 |
0.42 |
0.62 |
1.01 |
3959.66 |
1 |
| Woodiness |
0.63 |
0.00 |
0.14 |
0.35 |
0.54 |
0.63 |
0.73 |
0.91 |
4465.82 |
1 |
| Herbaceous |
-1.08 |
0.00 |
0.23 |
-1.52 |
-1.24 |
-1.09 |
-0.92 |
-0.61 |
2553.18 |
1 |
| Climate sum |
-0.28 |
0.00 |
0.10 |
-0.47 |
-0.34 |
-0.28 |
-0.21 |
-0.09 |
4502.17 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4573.29 |
1 |
| Hermaphroditic |
0.17 |
0.00 |
0.23 |
-0.29 |
0.01 |
0.16 |
0.33 |
0.63 |
4412.89 |
1 |
| Dioecious |
-0.15 |
0.00 |
0.16 |
-0.46 |
-0.26 |
-0.15 |
-0.04 |
0.15 |
4776.48 |
1 |
| Monoecious |
-0.11 |
0.00 |
0.16 |
-0.43 |
-0.22 |
-0.11 |
-0.01 |
0.20 |
4729.80 |
1 |
| Asexual |
0.02 |
0.00 |
0.17 |
-0.31 |
-0.09 |
0.02 |
0.13 |
0.34 |
4575.64 |
1 |
| Fleshy fruit |
0.04 |
0.00 |
0.15 |
-0.26 |
-0.06 |
0.04 |
0.15 |
0.35 |
4665.84 |
1 |
| Animal pollinated |
0.60 |
0.00 |
0.26 |
0.10 |
0.42 |
0.60 |
0.77 |
1.10 |
4267.24 |
1 |
| SD brownian effects |
0.51 |
0.01 |
0.29 |
0.03 |
0.29 |
0.51 |
0.72 |
1.09 |
612.16 |
1 |
| SD family specific effects |
0.75 |
0.00 |
0.12 |
0.46 |
0.68 |
0.76 |
0.83 |
0.94 |
803.69 |
1 |
bayes_R2(vet_rare)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983752 0.0003806278 0.9974841 0.9989616
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(vet_rare)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet <- apply(pp_rare, 1, function(x) RMSE(x,vet_rare$data$rare))
# plot(density(rmse_vet), main="RMSE across posterior predictions")
mean(rmse_vet)
## [1] 6.31501
sd(rmse_vet)
## [1] 0.7443097
# LOO
loo(vet_rare)
## Warning: Found 131 observations with a pareto_k > 0.7 in model 'vet_rare'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
##
## Computed from 5000 by 243 log-likelihood matrix
##
## Estimate SE
## elpd_loo -633.7 19.3
## p_loo 155.6 6.6
## looic 1267.4 38.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 53 21.8% 1160
## (0.5, 0.7] (ok) 59 24.3% 209
## (0.7, 1] (bad) 107 44.0% 21
## (1, Inf) (very bad) 24 9.9% 5
## See help('pareto-k-diagnostic') for details.
IUCN vetted rarity model: no family level effects
if (!file.exists("./vet_rare_nofam_10k.rds")) {
vet_rare_nofam <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal,
dat=data_vetted, family=binomial(),
iter=10000, thin=4,
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(vet_rare_nofam, "./vet_rare_nofam_10k.rds")
} else { vet_rare_nofam <- readRDS("./vet_rare_nofam_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_nofam
outcome <- summary(vet_rare_nofam$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated")
vet_rare_nofam_plot <- stanplot(vet_rare_nofam, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.06 |
0 |
0.10 |
-0.13 |
0.00 |
0.06 |
0.13 |
0.25 |
4939.75 |
1 |
| Famly age |
0.06 |
0 |
0.09 |
-0.13 |
0.00 |
0.06 |
0.12 |
0.24 |
5070.41 |
1 |
| Woodiness |
0.52 |
0 |
0.03 |
0.45 |
0.49 |
0.51 |
0.54 |
0.58 |
4504.94 |
1 |
| Herbaceous |
-0.67 |
0 |
0.05 |
-0.77 |
-0.71 |
-0.67 |
-0.64 |
-0.57 |
4674.90 |
1 |
| Climate sum |
-0.42 |
0 |
0.02 |
-0.47 |
-0.44 |
-0.42 |
-0.40 |
-0.37 |
4854.91 |
1 |
| Number of species hybrids |
0.00 |
0 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4946.06 |
1 |
| Hermaphroditic |
0.30 |
0 |
0.06 |
0.18 |
0.26 |
0.30 |
0.34 |
0.42 |
4697.34 |
1 |
| Dioecious |
0.14 |
0 |
0.03 |
0.07 |
0.12 |
0.14 |
0.16 |
0.21 |
4622.04 |
1 |
| Monoecious |
-0.15 |
0 |
0.04 |
-0.23 |
-0.18 |
-0.15 |
-0.13 |
-0.08 |
4840.76 |
1 |
| Asexual |
0.14 |
0 |
0.04 |
0.07 |
0.12 |
0.14 |
0.16 |
0.21 |
5070.97 |
1 |
| Fleshy fruit |
-0.22 |
0 |
0.02 |
-0.26 |
-0.23 |
-0.22 |
-0.20 |
-0.17 |
4533.41 |
1 |
| Animal pollinated |
0.97 |
0 |
0.06 |
0.85 |
0.93 |
0.97 |
1.01 |
1.09 |
4877.15 |
1 |
vet_rare_nofam_plot

pdf("./plots_tables/vet_rare_nofam_plot.pdf", width=4, height=7)
vet_rare_nofam_plot
dev.off()
## png
## 2
bayes_R2(vet_rare_nofam)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9595333 0.002023691 0.9553965 0.9633137
##Posterior predictive checks
# first make posterior predictions
pp_rare_nofam <- brms::posterior_predict(vet_rare_nofam)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nofam$data$rare, pp_rare_nofam[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_rare_nofam <- apply(pp_rare_nofam, 1, function(x) RMSE(x,vet_rare_nofam$data$rare))
plot(density(rmse_rare_nofam), main="RMSE across posterior predictions")

mean(rmse_rare_nofam)
## [1] 24.24252
sd(rmse_rare_nofam)
## [1] 1.362858
# LOO
loo(vet_rare_nofam)
## Warning: Found 16 observations with a pareto_k > 0.7 in model
## 'vet_rare_nofam'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 243 log-likelihood matrix
##
## Estimate SE
## elpd_loo -1490.9 164.5
## p_loo 211.1 63.3
## looic 2981.8 329.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 216 88.9% 288
## (0.5, 0.7] (ok) 11 4.5% 121
## (0.7, 1] (bad) 8 3.3% 16
## (1, Inf) (very bad) 8 3.3% 1
## See help('pareto-k-diagnostic') for details.
IUCN vetted rarity model: only non-brownian family effects
if (!file.exists("./vet_rare_nophylo_10k.rds")) {
vet_rare_nophylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4,
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(vet_rare_nophylo, "./vet_rare_nophylo_10k.rds")
} else { vet_rare_nophylo <- readRDS("./vet_rare_nophylo_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_nophylo
outcome <- summary(vet_rare_nophylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD family specific effects")
vet_rare_nophylo_plot <- stanplot(vet_rare_nophylo, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.49 |
0.01 |
0.34 |
-0.19 |
0.27 |
0.49 |
0.72 |
1.14 |
3008.71 |
1 |
| Famly age |
0.51 |
0.01 |
0.30 |
-0.10 |
0.31 |
0.51 |
0.70 |
1.10 |
3226.27 |
1 |
| Woodiness |
0.66 |
0.00 |
0.12 |
0.42 |
0.58 |
0.66 |
0.75 |
0.91 |
3433.68 |
1 |
| Herbaceous |
-1.19 |
0.00 |
0.19 |
-1.57 |
-1.32 |
-1.19 |
-1.06 |
-0.83 |
3513.24 |
1 |
| Climate sum |
-0.29 |
0.00 |
0.10 |
-0.48 |
-0.35 |
-0.29 |
-0.22 |
-0.10 |
3204.75 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4305.33 |
1 |
| Hermaphroditic |
0.16 |
0.00 |
0.23 |
-0.28 |
0.01 |
0.16 |
0.32 |
0.61 |
3335.70 |
1 |
| Dioecious |
-0.15 |
0.00 |
0.16 |
-0.46 |
-0.26 |
-0.15 |
-0.05 |
0.15 |
3225.54 |
1 |
| Monoecious |
-0.12 |
0.00 |
0.16 |
-0.42 |
-0.23 |
-0.12 |
-0.01 |
0.19 |
3168.32 |
1 |
| Asexual |
0.04 |
0.00 |
0.16 |
-0.28 |
-0.07 |
0.04 |
0.14 |
0.35 |
2925.31 |
1 |
| Fleshy fruit |
0.05 |
0.00 |
0.15 |
-0.25 |
-0.05 |
0.05 |
0.16 |
0.36 |
4062.99 |
1 |
| Animal pollinated |
0.67 |
0.00 |
0.24 |
0.21 |
0.52 |
0.67 |
0.83 |
1.14 |
3827.89 |
1 |
| SD family specific effects |
0.85 |
0.00 |
0.06 |
0.74 |
0.81 |
0.85 |
0.89 |
0.98 |
3620.69 |
1 |
vet_rare_nophylo_plot

pdf("./plots_tables/vet_rare_nophylo_plot.pdf", width=4, height=7)
vet_rare_nophylo_plot
dev.off()
## png
## 2
bayes_R2(vet_rare_nophylo)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983705 0.000393171 0.9974769 0.9989715
##Posterior predictive checks
# first make posterior predictions
pp_rare_nophylo <- brms::posterior_predict(vet_rare_nophylo)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nophylo$data$rare, pp_rare_nophylo[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_rare_nophylo <- apply(pp_rare_nophylo, 1, function(x) RMSE(x,vet_rare_nophylo$data$rare))
plot(density(rmse_rare_nophylo), main="RMSE across posterior predictions")

mean(rmse_rare_nophylo)
## [1] 6.325529
sd(rmse_rare_nophylo)
## [1] 0.76199
# LOO
loo(vet_rare_nophylo)
## Warning: Found 130 observations with a pareto_k > 0.7 in model
## 'vet_rare_nophylo'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 243 log-likelihood matrix
##
## Estimate SE
## elpd_loo -631.9 19.2
## p_loo 153.7 6.6
## looic 1263.9 38.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 61 25.1% 742
## (0.5, 0.7] (ok) 52 21.4% 155
## (0.7, 1] (bad) 110 45.3% 24
## (1, Inf) (very bad) 20 8.2% 6
## See help('pareto-k-diagnostic') for details.
IUCN vetted rarity model: only brownian family effects
if (!file.exists("./vet_rare_phylo_10k.rds")) {
vet_rare_phylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal+ (1|family),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(vet_rare_phylo, "./vet_rare_phylo_10k.rds")
} else { vet_rare_phylo <- readRDS("./vet_rare_phylo_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_phylo
outcome <- summary(vet_rare_phylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects")
vet_rare_phylo_plot <- stanplot(vet_rare_phylo, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.53 |
0.01 |
0.34 |
-0.12 |
0.30 |
0.53 |
0.75 |
1.20 |
4123.41 |
1 |
| Famly age |
0.31 |
0.01 |
0.34 |
-0.37 |
0.08 |
0.32 |
0.53 |
0.99 |
4024.22 |
1 |
| Woodiness |
0.59 |
0.00 |
0.14 |
0.31 |
0.50 |
0.59 |
0.68 |
0.86 |
3679.34 |
1 |
| Herbaceous |
-0.93 |
0.00 |
0.23 |
-1.38 |
-1.08 |
-0.93 |
-0.78 |
-0.49 |
3858.38 |
1 |
| Climate sum |
-0.26 |
0.00 |
0.10 |
-0.44 |
-0.32 |
-0.26 |
-0.19 |
-0.07 |
4016.73 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4554.56 |
1 |
| Hermaphroditic |
0.13 |
0.00 |
0.23 |
-0.32 |
-0.02 |
0.13 |
0.28 |
0.57 |
4164.96 |
1 |
| Dioecious |
-0.16 |
0.00 |
0.15 |
-0.45 |
-0.26 |
-0.15 |
-0.06 |
0.14 |
3712.41 |
1 |
| Monoecious |
-0.06 |
0.00 |
0.15 |
-0.37 |
-0.17 |
-0.06 |
0.05 |
0.23 |
3656.72 |
1 |
| Asexual |
-0.01 |
0.00 |
0.16 |
-0.33 |
-0.12 |
-0.01 |
0.10 |
0.30 |
3761.79 |
1 |
| Fleshy fruit |
-0.07 |
0.00 |
0.12 |
-0.31 |
-0.16 |
-0.07 |
0.01 |
0.17 |
4049.59 |
1 |
| Animal pollinated |
0.49 |
0.00 |
0.26 |
-0.01 |
0.32 |
0.49 |
0.67 |
1.01 |
3867.06 |
1 |
| SD brownian effects |
1.33 |
0.00 |
0.10 |
1.14 |
1.26 |
1.32 |
1.39 |
1.53 |
3845.59 |
1 |
vet_rare_phylo_plot

pdf("./plots_tables/vet_rare_phylo_plot.pdf", width=4, height=7)
vet_rare_phylo_plot
dev.off()
## png
## 2
bayes_R2(vet_rare_phylo)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983544 0.0004038597 0.9974098 0.9989625
##Posterior predictive checks
# first make posterior predictions
pp_rare_phylo <- brms::posterior_predict(vet_rare_phylo)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_phylo$data$rare, pp_rare_phylo[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_rare_phylo <- apply(pp_rare_phylo, 1, function(x) RMSE(x,vet_rare_phylo$data$rare))
plot(density(rmse_rare_phylo), main="RMSE across posterior predictions")

mean(rmse_rare_phylo)
## [1] 6.322799
sd(rmse_rare_phylo)
## [1] 0.7548457
# LOO
loo(vet_rare_phylo)
## Warning: Found 139 observations with a pareto_k > 0.7 in model
## 'vet_rare_phylo'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 243 log-likelihood matrix
##
## Estimate SE
## elpd_loo -638.3 19.7
## p_loo 158.3 7.2
## looic 1276.6 39.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 59 24.3% 563
## (0.5, 0.7] (ok) 45 18.5% 189
## (0.7, 1] (bad) 111 45.7% 20
## (1, Inf) (very bad) 28 11.5% 9
## See help('pareto-k-diagnostic') for details.
Naturalized model
Proportion naturalized species
if (!file.exists("./fit_nat_10k.rds")) {
fit_nat <- brm(Naturalised | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.8, max_treedepth=11))
saveRDS(fit_nat, "./fit_nat_10k.rds")
} else { fit_nat <- readRDS("./fit_nat_10k.rds")}
# fit_nat
outcome <- summary(fit_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
fit_nat_plot <- stanplot(fit_nat, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_nat_plot

pdf("./plots_tables/fit_nat_plot.pdf", width=4, height=7)
fit_nat_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
-1.65 |
0.00 |
0.33 |
-2.29 |
-1.88 |
-1.65 |
-1.42 |
-1.00 |
4580.08 |
1.00 |
| Famly age |
-0.92 |
0.01 |
0.35 |
-1.60 |
-1.16 |
-0.92 |
-0.68 |
-0.22 |
4521.70 |
1.00 |
| Woodiness |
-0.05 |
0.00 |
0.16 |
-0.37 |
-0.16 |
-0.04 |
0.06 |
0.27 |
4366.29 |
1.00 |
| Herbaceous |
0.95 |
0.00 |
0.26 |
0.45 |
0.78 |
0.95 |
1.12 |
1.46 |
4277.33 |
1.00 |
| Climate sum |
0.33 |
0.00 |
0.12 |
0.10 |
0.25 |
0.33 |
0.40 |
0.55 |
4231.76 |
1.00 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4635.04 |
1.00 |
| Hermaphroditic |
0.08 |
0.00 |
0.28 |
-0.47 |
-0.11 |
0.08 |
0.27 |
0.64 |
4162.15 |
1.00 |
| Dioecious |
-0.11 |
0.00 |
0.19 |
-0.50 |
-0.24 |
-0.11 |
0.02 |
0.26 |
4204.36 |
1.00 |
| Monoecious |
-0.17 |
0.00 |
0.19 |
-0.52 |
-0.29 |
-0.17 |
-0.04 |
0.22 |
4640.40 |
1.00 |
| Asexual |
0.40 |
0.00 |
0.20 |
0.00 |
0.27 |
0.40 |
0.54 |
0.80 |
4542.51 |
1.00 |
| Fleshy fruit |
0.00 |
0.00 |
0.17 |
-0.34 |
-0.11 |
0.00 |
0.11 |
0.33 |
4650.20 |
1.00 |
| Animal pollinated |
-0.80 |
0.00 |
0.32 |
-1.43 |
-1.03 |
-0.80 |
-0.58 |
-0.17 |
4228.21 |
1.00 |
| SD brownian effects |
1.76 |
0.01 |
0.22 |
1.29 |
1.61 |
1.78 |
1.91 |
2.13 |
670.57 |
1.01 |
| SD family specific effects |
0.47 |
0.01 |
0.24 |
0.03 |
0.29 |
0.49 |
0.65 |
0.91 |
449.64 |
1.01 |
bayes_R2(fit_nat)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9967621 0.0008056879 0.9948163 0.9979574
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.91 0.09 0.69 1 NA
## Post.Prob Star
## 1 NA *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
plot(hyp)

##Posterior predictive checks
# first make posterior predictions
pp_nat <- brms::posterior_predict(fit_nat)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=fit_nat$data$Naturalised, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

rmse_nat <- apply(pp_nat, 1, function(x) RMSE(x,fit_nat$data$Naturalised))
plot(density(rmse_nat), main="RMSE across posterior predictions")

mean(rmse_nat)
## [1] 7.183274
sd(rmse_nat)
## [1] 0.8720455
# LOO
loo(fit_nat)
## Warning: Found 180 observations with a pareto_k > 0.7 in model 'fit_nat'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
##
## Computed from 5000 by 357 log-likelihood matrix
##
## Estimate SE
## elpd_loo -788.4 30.5
## p_loo 207.0 9.5
## looic 1576.9 61.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 77 21.6% 1108
## (0.5, 0.7] (ok) 100 28.0% 203
## (0.7, 1] (bad) 146 40.9% 16
## (1, Inf) (very bad) 34 9.5% 8
## See help('pareto-k-diagnostic') for details.
Invasive model
Proportion invasive species
if (!file.exists("./fit_inv_10k.rds")) {
fit_inv <- brm(Invasive | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.8, max_treedepth=11))
saveRDS(fit_inv, "./fit_inv_10k.rds")
} else { fit_inv <- readRDS("./fit_inv_10k.rds")}
# fit_inv
outcome <- summary(fit_inv$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
fit_inv_plot <- stanplot(fit_inv, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of invasive species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_inv_plot

pdf("./plots_tables/fit_inv_plot.pdf", width=4, height=7)
fit_inv_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
-1.02 |
0.01 |
0.45 |
-1.87 |
-1.32 |
-1.03 |
-0.72 |
-0.12 |
4407.15 |
1 |
| Famly age |
-1.30 |
0.01 |
0.43 |
-2.13 |
-1.60 |
-1.30 |
-1.02 |
-0.46 |
4580.42 |
1 |
| Woodiness |
0.21 |
0.00 |
0.19 |
-0.16 |
0.09 |
0.21 |
0.33 |
0.58 |
4970.14 |
1 |
| Herbaceous |
0.26 |
0.00 |
0.32 |
-0.37 |
0.04 |
0.26 |
0.47 |
0.89 |
4288.17 |
1 |
| Climate sum |
0.17 |
0.00 |
0.14 |
-0.11 |
0.07 |
0.17 |
0.26 |
0.43 |
4302.02 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4209.73 |
1 |
| Hermaphroditic |
-0.19 |
0.00 |
0.34 |
-0.84 |
-0.41 |
-0.20 |
0.03 |
0.48 |
4791.70 |
1 |
| Dioecious |
-0.09 |
0.00 |
0.21 |
-0.52 |
-0.23 |
-0.10 |
0.04 |
0.33 |
5011.49 |
1 |
| Monoecious |
-0.10 |
0.00 |
0.22 |
-0.52 |
-0.25 |
-0.10 |
0.04 |
0.33 |
4295.29 |
1 |
| Asexual |
0.48 |
0.00 |
0.22 |
0.04 |
0.33 |
0.48 |
0.63 |
0.92 |
4926.11 |
1 |
| Fleshy fruit |
-0.02 |
0.00 |
0.20 |
-0.40 |
-0.15 |
-0.02 |
0.11 |
0.36 |
4206.03 |
1 |
| Animal pollinated |
-0.32 |
0.01 |
0.37 |
-1.03 |
-0.57 |
-0.32 |
-0.07 |
0.42 |
3641.29 |
1 |
| SD brownian effects |
1.18 |
0.02 |
0.43 |
0.20 |
0.91 |
1.23 |
1.51 |
1.87 |
637.45 |
1 |
| SD family specific effects |
0.70 |
0.01 |
0.28 |
0.09 |
0.51 |
0.74 |
0.91 |
1.16 |
631.77 |
1 |
bayes_R2(fit_inv)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9858434 0.004242523 0.9752231 0.9916901
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_inv, hyp, class = NULL))
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0 0.67 0.27 0.03 1 NA
## Post.Prob Star
## 1 NA *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
plot(hyp)

##Posterior predictive checks
# first make posterior predictions
pp_inv <- brms::posterior_predict(fit_inv)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=fit_inv$data$Invasive, pp_inv[1:50, ]) + scale_x_continuous(trans="log1p")

rmse_inv <- apply(pp_inv, 1, function(x) RMSE(x,fit_inv$data$Invasive))
plot(density(rmse_inv), main="RMSE across posterior predictions")

mean(rmse_inv)
## [1] 3.023248
sd(rmse_inv)
## [1] 0.4356598
# LOO
loo(fit_inv)
## Warning: Found 114 observations with a pareto_k > 0.7 in model 'fit_inv'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
##
## Computed from 5000 by 357 log-likelihood matrix
##
## Estimate SE
## elpd_loo -468.6 28.1
## p_loo 133.3 10.4
## looic 937.3 56.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 156 43.7% 1042
## (0.5, 0.7] (ok) 87 24.4% 140
## (0.7, 1] (bad) 96 26.9% 27
## (1, Inf) (very bad) 18 5.0% 3
## See help('pareto-k-diagnostic') for details.
Sensitivity Analyses
Simple rarity model with quadratic age term
if (!file.exists("./vet_rare_naturalized_quad_age_10k.rds")) {
vet_rare_nat_qa <- brm(rare | trials(vetted) ~ Naturalised_scaled + diversification_scaled + new_age_scaled + I(new_age_scaled^2) + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_rare_nat_qa, "./vet_rare_naturalized_quad_age_10k.rds")
} else { vet_rare_nat_qa <- readRDS("./vet_rare_naturalized_quad_age_10k.rds")}
# vet_rare_nat_qa
# pairs(vet_rare_nat_qa)
outcome <- summary(vet_rare_nat_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Diversification rate","Family age", "Family Age ^2","SD brownian effects","SD family specific effects")
vet_rare_nat_qa_plot <- stanplot(vet_rare_nat_qa, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nat_qa_plot

pdf("./plots_tables/vet_rare_nat_qa_plot.pdf", width=4, height=3)
vet_rare_nat_qa_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Number of naturalized species |
-0.73 |
0.00 |
0.20 |
-1.12 |
-0.86 |
-0.73 |
-0.60 |
-0.36 |
4409.60 |
1 |
| Diversification rate |
1.25 |
0.00 |
0.33 |
0.62 |
1.03 |
1.25 |
1.47 |
1.90 |
4692.22 |
1 |
| Family age |
1.03 |
0.00 |
0.32 |
0.40 |
0.82 |
1.03 |
1.25 |
1.67 |
4623.45 |
1 |
| Family Age ^2 |
-0.21 |
0.00 |
0.29 |
-0.80 |
-0.41 |
-0.21 |
-0.01 |
0.37 |
4802.55 |
1 |
| SD brownian effects |
0.98 |
0.01 |
0.20 |
0.59 |
0.84 |
0.98 |
1.12 |
1.38 |
1338.77 |
1 |
| SD family specific effects |
0.64 |
0.00 |
0.14 |
0.32 |
0.56 |
0.65 |
0.74 |
0.88 |
1316.53 |
1 |
bayes_R2(vet_rare_nat_qa)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983417 0.0003915706 0.997416 0.9989467
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_qa <- brms::posterior_predict(vet_rare_nat_qa)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nat_qa$data$rare, pp_vet_rare_nat_qa[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_rare_nat_qa <- apply(pp_vet_rare_nat_qa, 1, function(x) RMSE(x,vet_rare_nat_qa$data$rare))
plot(density(rmse_vet_rare_nat_qa), main="RMSE across posterior predictions")

mean(rmse_vet_rare_nat_qa)
## [1] 6.171456
sd(rmse_vet_rare_nat_qa)
## [1] 0.7207039
# LOO
loo(vet_rare_nat_qa)
## Warning: Found 153 observations with a pareto_k > 0.7 in model
## 'vet_rare_nat_qa'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 259 log-likelihood matrix
##
## Estimate SE
## elpd_loo -668.1 18.8
## p_loo 161.2 5.8
## looic 1336.3 37.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 55 21.2% 483
## (0.5, 0.7] (ok) 51 19.7% 214
## (0.7, 1] (bad) 132 51.0% 22
## (1, Inf) (very bad) 21 8.1% 12
## See help('pareto-k-diagnostic') for details.
IUCN vetted full rarity model with quadratic term
if (!file.exists("./vet_rare_quad_age_10k.rds")) {
vet_rare_qa <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + I(new_age_scaled^2) + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(vet_rare_qa, "./vet_rare_quad_age_10k.rds")
} else { vet_rare_qa <- readRDS("./vet_rare_quad_age_10k.rds")}
# vet_rare_qa
outcome <- summary(vet_rare_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Family Age ^2", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
vet_rare_qa_plot <- stanplot(vet_rare_qa, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_qa_plot

pdf("./plots_tables/vet_rare_qa_plot.pdf", width=4, height=7)
vet_rare_qa_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.46 |
0.01 |
0.35 |
-0.21 |
0.23 |
0.46 |
0.69 |
1.15 |
3961.81 |
1 |
| Famly age |
0.40 |
0.00 |
0.32 |
-0.22 |
0.19 |
0.40 |
0.62 |
1.03 |
4157.17 |
1 |
| Family Age ^2 |
0.00 |
0.00 |
0.28 |
-0.54 |
-0.19 |
0.00 |
0.19 |
0.55 |
4319.58 |
1 |
| Woodiness |
0.63 |
0.00 |
0.14 |
0.35 |
0.54 |
0.63 |
0.72 |
0.91 |
4335.46 |
1 |
| Herbaceous |
-1.08 |
0.00 |
0.23 |
-1.54 |
-1.23 |
-1.08 |
-0.93 |
-0.61 |
3203.40 |
1 |
| Climate sum |
-0.28 |
0.00 |
0.10 |
-0.47 |
-0.34 |
-0.28 |
-0.21 |
-0.09 |
4217.03 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4796.10 |
1 |
| Hermaphroditic |
0.16 |
0.00 |
0.23 |
-0.29 |
0.01 |
0.16 |
0.31 |
0.62 |
4124.98 |
1 |
| Dioecious |
-0.15 |
0.00 |
0.16 |
-0.46 |
-0.26 |
-0.15 |
-0.05 |
0.16 |
4830.01 |
1 |
| Monoecious |
-0.12 |
0.00 |
0.16 |
-0.44 |
-0.23 |
-0.12 |
0.00 |
0.21 |
4678.97 |
1 |
| Asexual |
0.02 |
0.00 |
0.17 |
-0.30 |
-0.09 |
0.02 |
0.13 |
0.36 |
4749.06 |
1 |
| Fleshy fruit |
0.04 |
0.00 |
0.16 |
-0.26 |
-0.06 |
0.04 |
0.15 |
0.36 |
3927.81 |
1 |
| Animal pollinated |
0.59 |
0.00 |
0.25 |
0.11 |
0.41 |
0.59 |
0.76 |
1.08 |
4061.07 |
1 |
| SD brownian effects |
0.55 |
0.01 |
0.28 |
0.04 |
0.34 |
0.56 |
0.75 |
1.10 |
643.91 |
1 |
| SD family specific effects |
0.74 |
0.00 |
0.12 |
0.47 |
0.67 |
0.75 |
0.82 |
0.94 |
850.53 |
1 |
bayes_R2(vet_rare_qa)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9983654 0.0004019589 0.9973731 0.9989681
##Posterior predictive checks
# first make posterior predictions
pp_rare_qa <- brms::posterior_predict(vet_rare_qa)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_qa$data$rare, pp_rare_qa[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_qa <- apply(pp_rare_qa, 1, function(x) RMSE(x,vet_rare_qa$data$rare))
plot(density(rmse_vet_qa), main="RMSE across posterior predictions")

mean(rmse_vet_qa)
## [1] 6.306899
sd(rmse_vet_qa)
## [1] 0.7333391
# LOO
loo(vet_rare)
## Warning: Found 131 observations with a pareto_k > 0.7 in model 'vet_rare'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
##
## Computed from 5000 by 243 log-likelihood matrix
##
## Estimate SE
## elpd_loo -633.7 19.3
## p_loo 155.6 6.6
## looic 1267.4 38.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 53 21.8% 1160
## (0.5, 0.7] (ok) 59 24.3% 209
## (0.7, 1] (bad) 107 44.0% 21
## (1, Inf) (very bad) 24 9.9% 5
## See help('pareto-k-diagnostic') for details.
IUCN non-vetted full rarity model
if (!file.exists("./nonvet_rare_10k.rds")) {
nonvet_rare <- brm(rare | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(nonvet_rare, "./nonvet_rare_10k.rds")
} else { nonvet_rare <- readRDS("./nonvet_rare_10k.rds")}
# nonvet_rare
outcome <- summary(nonvet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
nonvet_rare_plot <- stanplot(nonvet_rare, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
nonvet_rare_plot

pdf("./plots_tables/nonvet_rare_plot.pdf", width=4, height=7)
nonvet_rare_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.68 |
0.01 |
0.36 |
0.00 |
0.44 |
0.68 |
0.92 |
1.42 |
4441.99 |
1.00 |
| Famly age |
0.50 |
0.00 |
0.32 |
-0.10 |
0.29 |
0.50 |
0.71 |
1.16 |
4552.94 |
1.00 |
| Woodiness |
-0.42 |
0.00 |
0.16 |
-0.74 |
-0.53 |
-0.42 |
-0.31 |
-0.11 |
3988.19 |
1.00 |
| Herbaceous |
-0.60 |
0.00 |
0.25 |
-1.11 |
-0.77 |
-0.59 |
-0.43 |
-0.10 |
3551.10 |
1.00 |
| Climate sum |
-0.19 |
0.00 |
0.11 |
-0.41 |
-0.27 |
-0.19 |
-0.12 |
0.02 |
4535.94 |
1.00 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4766.30 |
1.00 |
| Hermaphroditic |
-0.12 |
0.00 |
0.27 |
-0.63 |
-0.29 |
-0.12 |
0.07 |
0.41 |
4368.39 |
1.00 |
| Dioecious |
-0.19 |
0.00 |
0.19 |
-0.56 |
-0.32 |
-0.19 |
-0.07 |
0.18 |
4424.59 |
1.00 |
| Monoecious |
0.08 |
0.00 |
0.19 |
-0.29 |
-0.04 |
0.08 |
0.21 |
0.44 |
4441.74 |
1.00 |
| Asexual |
0.15 |
0.00 |
0.21 |
-0.25 |
0.01 |
0.15 |
0.29 |
0.55 |
4520.87 |
1.00 |
| Fleshy fruit |
0.24 |
0.00 |
0.18 |
-0.13 |
0.11 |
0.23 |
0.36 |
0.59 |
4577.38 |
1.00 |
| Animal pollinated |
0.16 |
0.00 |
0.31 |
-0.44 |
-0.05 |
0.16 |
0.37 |
0.76 |
4464.41 |
1.00 |
| SD brownian effects |
0.69 |
0.01 |
0.31 |
0.08 |
0.47 |
0.70 |
0.91 |
1.28 |
472.67 |
1.01 |
| SD family specific effects |
1.07 |
0.00 |
0.12 |
0.80 |
0.99 |
1.07 |
1.15 |
1.29 |
794.26 |
1.01 |
bayes_R2(nonvet_rare)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9964116 0.0007896307 0.9946255 0.9976707
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(nonvet_rare)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=nonvet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_nonvet <- apply(pp_rare, 1, function(x) RMSE(x,nonvet_rare$data$rare))
plot(density(rmse_nonvet), main="RMSE across posterior predictions")

mean(rmse_nonvet)
## [1] 7.977609
sd(rmse_nonvet)
## [1] 0.8493576
# LOO
loo(nonvet_rare)
## Warning: Found 192 observations with a pareto_k > 0.7 in model
## 'nonvet_rare'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 357 log-likelihood matrix
##
## Estimate SE
## elpd_loo -816.6 31.4
## p_loo 208.9 9.6
## looic 1633.3 62.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 88 24.6% 599
## (0.5, 0.7] (ok) 77 21.6% 112
## (0.7, 1] (bad) 161 45.1% 20
## (1, Inf) (very bad) 31 8.7% 4
## See help('pareto-k-diagnostic') for details.
IUCN non-vetted full rarity model with quadratic age term
if (!file.exists("./nonvet_rare_qa_10k.rds")) {
nonvet_rare_qa <- brm(rare | trials(species) ~ diversification_scaled + new_age_scaled + I(new_age_scaled^2) + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name),
dat=data, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov),
control=list(adapt_delta=0.95,max_treedepth=13))
saveRDS(nonvet_rare_qa, "./nonvet_rare_qa_10k.rds")
} else { nonvet_rare_qa <- readRDS("./nonvet_rare_qa_10k.rds")}
# nonvet_rare_qa
outcome <- summary(nonvet_rare_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Diversification rate","Famly age", "Family Age ^2", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")
nonvet_rare_qa_plot <- stanplot(nonvet_rare_qa, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
nonvet_rare_qa_plot

pdf("./plots_tables/nonvet_rare_qa_plot.pdf", width=4, height=7)
nonvet_rare_qa_plot
dev.off()
## png
## 2
rownames(to_print) <- names1
kable(to_print, digits=2)
| Diversification rate |
0.74 |
0.01 |
0.37 |
0.02 |
0.49 |
0.74 |
0.98 |
1.47 |
4071.21 |
1 |
| Famly age |
0.52 |
0.00 |
0.32 |
-0.11 |
0.31 |
0.52 |
0.73 |
1.16 |
4734.89 |
1 |
| Family Age ^2 |
-0.27 |
0.00 |
0.33 |
-0.92 |
-0.49 |
-0.27 |
-0.04 |
0.37 |
4443.68 |
1 |
| Woodiness |
-0.43 |
0.00 |
0.16 |
-0.74 |
-0.54 |
-0.43 |
-0.32 |
-0.10 |
4232.39 |
1 |
| Herbaceous |
-0.60 |
0.00 |
0.26 |
-1.10 |
-0.77 |
-0.60 |
-0.43 |
-0.10 |
3301.34 |
1 |
| Climate sum |
-0.20 |
0.00 |
0.11 |
-0.41 |
-0.27 |
-0.20 |
-0.12 |
0.02 |
4702.55 |
1 |
| Number of species hybrids |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
4651.11 |
1 |
| Hermaphroditic |
-0.14 |
0.00 |
0.27 |
-0.65 |
-0.32 |
-0.13 |
0.04 |
0.38 |
4728.11 |
1 |
| Dioecious |
-0.21 |
0.00 |
0.19 |
-0.58 |
-0.33 |
-0.21 |
-0.08 |
0.17 |
4027.80 |
1 |
| Monoecious |
0.07 |
0.00 |
0.19 |
-0.30 |
-0.06 |
0.07 |
0.19 |
0.44 |
4394.00 |
1 |
| Asexual |
0.14 |
0.00 |
0.21 |
-0.25 |
0.00 |
0.14 |
0.29 |
0.55 |
4146.01 |
1 |
| Fleshy fruit |
0.26 |
0.00 |
0.18 |
-0.11 |
0.13 |
0.26 |
0.39 |
0.61 |
4615.45 |
1 |
| Animal pollinated |
0.14 |
0.00 |
0.31 |
-0.45 |
-0.06 |
0.14 |
0.34 |
0.74 |
4548.91 |
1 |
| SD brownian effects |
0.69 |
0.01 |
0.30 |
0.08 |
0.49 |
0.69 |
0.90 |
1.28 |
645.05 |
1 |
| SD family specific effects |
1.07 |
0.00 |
0.12 |
0.82 |
1.00 |
1.08 |
1.16 |
1.29 |
994.13 |
1 |
bayes_R2(nonvet_rare_qa)
## Estimate Est.Error Q2.5 Q97.5
## R2 0.9964198 0.000800492 0.9945435 0.9977011
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(nonvet_rare_qa)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=nonvet_rare_qa$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_nonvet_qa <- apply(pp_rare, 1, function(x) RMSE(x,nonvet_rare_qa$data$rare))
plot(density(rmse_nonvet_qa), main="RMSE across posterior predictions")

mean(rmse_nonvet_qa)
## [1] 7.964437
sd(rmse_nonvet_qa)
## [1] 0.870164
# LOO
loo(nonvet_rare_qa)
## Warning: Found 198 observations with a pareto_k > 0.7 in model
## 'nonvet_rare_qa'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
##
## Computed from 5000 by 357 log-likelihood matrix
##
## Estimate SE
## elpd_loo -817.7 31.8
## p_loo 210.6 10.1
## looic 1635.4 63.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 86 24.1% 557
## (0.5, 0.7] (ok) 73 20.4% 189
## (0.7, 1] (bad) 166 46.5% 32
## (1, Inf) (very bad) 32 9.0% 3
## See help('pareto-k-diagnostic') for details.
With clade age + species, but no diversification
if (!file.exists("./vet_rare_naturalized_spec_age_10k.rds")) {
vet_rare_nat_v2 <- brm(rare | trials(vetted) ~ Naturalised_scaled + species_scaled + new_age_scaled + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_rare_nat_v2, "./vet_rare_naturalized_spec_age_10k.rds")
} else { vet_rare_nat_v2 <- readRDS("./vet_rare_naturalized_spec_age_10k.rds")}
# vet_rare_nat_v2
# pairs(vet_rare_nat_v2)
outcome <- summary(vet_rare_nat_v2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Number of species","Family age","SD brownian effects","SD family specific effects")
vet_rare_nat_v2_plot <- stanplot(vet_rare_nat_v2, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
vet_rare_nat_v2_plot
pdf("./plots_tables/vet_rare_nat_v2_plot.pdf", width=4, height=3)
vet_rare_nat_v2_plot
dev.off()
rownames(to_print) <- names1
kable(to_print, digits=2)
bayes_R2(vet_rare_nat_v2)
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_v2 <- brms::posterior_predict(vet_rare_nat_v2)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nat_v2$data$rare, pp_vet_rare_nat_v2[1:50, ]) + scale_x_continuous(trans="log1p")
# RMSE
rmse_vet_rare_nat_v2 <- apply(pp_vet_rare_nat_v2, 1, function(x) RMSE(x,vet_rare_nat_v2$data$rare))
plot(density(rmse_vet_rare_nat_v2), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat_v2)
sd(rmse_vet_rare_nat_v2)
With diversification, clade age, and species
if (!file.exists("./vet_rare_naturalized_spec_age_div_10k.rds")) {
vet_rare_nat_v3 <- brm(rare | trials(vetted) ~ Naturalised_scaled +
diversification_scaled + species_scaled + new_age_scaled + (1|family) + (1|family_name),
dat=data_vetted, family=binomial(),
iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
control=list(adapt_delta=0.95,max_treedepth=12), cores=4)
saveRDS(vet_rare_nat_v3, "./vet_rare_naturalized_spec_age_div_10k.rds")
} else { vet_rare_nat_v3 <- readRDS("./vet_rare_naturalized_spec_age_div_10k.rds")}
# vet_rare_nat_v3
# pairs(vet_rare_nat_v3)
outcome <- summary(vet_rare_nat_v3$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
names1 <- c("Number of naturalized species","Diversification rate","Number of species","Family age","SD brownian effects","SD family specific effects")
vet_rare_nat_v3_plot <- stanplot(vet_rare_nat_v3, pars=rownames(to_print), type="intervals",
prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
vet_rare_nat_v3_plot
pdf("./plots_tables/vet_rare_nat_v3_plot.pdf", width=4, height=3)
vet_rare_nat_v3_plot
dev.off()
rownames(to_print) <- names1
kable(to_print, digits=2)
bayes_R2(vet_rare_nat_v3)
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_v3 <- brms::posterior_predict(vet_rare_nat_v3)
log1 <- scale_x_continuous(trans="log1p")
ppc_dens_overlay(y=vet_rare_nat_v3$data$rare, pp_vet_rare_nat_v3[1:50, ]) + scale_x_continuous(trans="log1p")
# RMSE
rmse_vet_rare_nat_v3 <- apply(pp_vet_rare_nat_v3, 1, function(x) RMSE(x,vet_rare_nat_v3$data$rare))
plot(density(rmse_vet_rare_nat_v3), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat_v3)
sd(rmse_vet_rare_nat_v3)